home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PsL Monthly 1993 December
/
PSL Monthly Shareware CD-ROM (December 1993).iso
/
prgmming
/
dos
/
pascal
/
rle.pas
< prev
next >
Wrap
Pascal/Delphi Source File
|
1993-02-03
|
17KB
|
479 lines
{$D+,L+} {debug info}
{$M 8192,0,655360} {stack size, heapmin, heapmax}
PROGRAM RunLengthEncode;
{
Author: Ken Murphy, CIS 74025,731
Date Written: January 1993
This program implements a Run-Length-Encoding algorithm to compress files.
I wrote this to test the decompression speed and compression effect on files
containing printer graphics. These files tend to be large with long series
repetitive characters. RLE is a bad compression choice if the file to be
compressed does not have runs of repetitive bytes. The algorithm gains
it's compression from reducing a series of the same byte (up to 127 of them)
to 2 bytes.
Tests have proven that this RLE decompression is very fast, and it's OK to
include this technique within other applications that employ files that can
benefit. Tests on my dot matrix printer graphic data files show compressions
of about 65% of original size. LZHuf compresses better, but this is faster
and simpler to develop for my needs.
The program has 3 command-line inputs in the order given:
1. Mode: C for compress or D for Decompress
2. Input file name
3. Output file name.
The encoded data file is a series of strings. Each string begins with a
"prefix" byte. The leftmost bit of the byte distinguishes between the two
types of strings. The remaining 7 bits in the prefix byte are a length value.
The prefix byte is followed by one or more data bytes. The two types of
compressed strings are "RUN" and "LEN". The RUN variety allows for sequences
of identical bytes. Generally, the maximum compression effect results from
these. The LEN variety accomodates sequences of data bytes that are not the
same.
Here's what it looks like in a compressed file:
If the leftmost bit of the prefix byte is 1, the string is a "RUN" string.
The remaining 7 bits of the prefix byte are the replication count value
for the following data byte. Therefore, a RUN string is always 2 bytes.
If the leftmost bit of the prefix byte is 0, the string is a "LENGTH"
string. The remaining 7 bits of the prefix byte are the number of
data bytes following the prefix byte.
Since the length value in the prefix byte is limited to 7 bits, a RUN or a
LENGTH can be no larger that 127 ($01 - $FF). I don't permit a length value
of 0. PASCAL will handle this type of byte as a SHORTINT. Remember that the
actual size of a RUN segment is always 2 bytes. A LEN segment never gets
larger than 128 bytes, i.e. 1 prefix byte and 127 (max) data bytes.
There's performance instrumentation kept and displayed so I can measure the
algorithm and how I've coded it.
The program was developed in Borland Pascal 7.0 for a REAL target. It uses
TurboPower's Object Professional, but there's no "Object Oriented"
programming here. There's not much in the way of "fancy" so it should be
easy to modify to vanilla Pascal.
Also, consider this a blatant advertisement and recommendation for TurboPower's
Object Professional library although I'm in no way affiliated with them.}
USES DOS, OPCrt, OPString;
CONST
MaxChars = 127;
EndOfFile = 100;
BufSize = 255;
TYPE
RLE = RECORD
RepeatCount : shortint;
Data : array[1..MaxChars] OF char;
END;
RLE_Rec_String= array[0..MaxChars] OF char;
SegmentTypes = (RUN, LEN);
TimeRecord = RECORD
Hour : Word;
Minute : Word;
Second : Word;
Sec100 : Word;
END;
CONST {typed CONST}
ReadResult : word = 0;
WriteCount : word = 0;
Segments : longint = 0;
TotalBytesIn : longint = 0;
TotalBytesOut : longint = 0;
EndInput : boolean = false;
DisplayPrefix : string[2] = ' ';
InputFileSize : real = 0;
LastIBuffer : boolean = false;
LastOBuffer : boolean = false;
OBufferIndex : integer = BufSize+1;
OBuffer : string[BufSize] = '';
IBuffer : string[BufSize] = '';
IBufferIndex : integer = Bufsize+1;
VAR
SegmentType : SegmentTypes;
RLE_Record : RLE;
RLE_String : RLE_Rec_String absolute RLE_Record;
CurrentChar : char;
LastChar : char;
InputFile : FILE;
OutputFile : FILE;
CompressMode : boolean;
InputFileName : string;
OutputFileName: string;
OldPct : string[7];
Started : TimeRecord;
Stopped : TimeRecord;
{*************************************************************}
PROCEDURE ShowProgress;
VAR
Percent : string[7];
{show the user what percentage of the input we've done - eye candy}
BEGIN
Str((TotalBytesIn * 100) / InputFileSize : 6 : 1, Percent);
IF OldPct <> Percent THEN {this speeds things a bit}
BEGIN
OldPct:=Percent;
FastText('Percentage Completed = ' + Percent, 6, 2);
END;
END; {ShowProgress}
{**************************************************************}
PROCEDURE ReadInput (CONST ByteCount : shortint;
VAR ReadString : RLE_Rec_String);
{feed BYTECOUNT bytes from the input file. A string buffer is used to hold
the file's data to minimize the I/O time spent in the file. The output is a
0-index-based char array.}
VAR
BCount : shortint;
BEGIN
IF IBufferIndex+ByteCount-1<=ReadResult THEN {more data in buffer?}
BEGIN
IF ByteCount=1 THEN
ReadString[0]:=IBuffer[IBufferIndex]
ELSE
Move (IBuffer[IBufferIndex],ReadString[0],ByteCount);
IBufferIndex := IBufferIndex + ByteCount;
END
ELSE
BEGIN {no, get more from file}
IF LastIBuffer THEN
EndInput:=true {last buffer was delivered}
ELSE
BEGIN {read another block}
BCount:=ByteCount;
{if there's anything in the buffer, output the remainder as the 1st
chunk of the data requested}
IF IBufferIndex<=BufSize THEN {was the entire buffer used}
BEGIN {no, there's more in there to give}
IF IBufferIndex=BufSize THEN
ReadString[0]:=IBuffer[BufSize]
ELSE
Move(IBuffer[IBufferIndex],ReadString[0],Length(IBuffer)-IBufferIndex+1);
BCount:=BCount-(BufSize-IBufferIndex+1);
END; {buffer index <= buffer size}
{$I-} BlockRead (InputFile, IBuffer[1], BufSize, ReadResult); {$I+}
IF (IOResult = EndOfFile) OR (ReadResult<BufSize) THEN
LastIBuffer:=true; {last buffer has been input}
IF ReadResult>0 THEN
BEGIN
IBuffer[0]:=Chr(ReadResult); {set length of string}
IF BCount=1 THEN {Asking for only 1 byte}
ReadString[ByteCount-BCount]:=IBuffer[1] {yes, faster than 1 byte MOVE}
ELSE
Move(IBuffer[1],ReadString[ByteCount-BCount],BCount);
TotalBytesIn:=TotalBytesIn + ReadResult;
IBufferIndex := BCount+1;
END; {result>0}
END; {input another block}
END; {else}
END; {ReadRaw}
{*************************************************************}
PROCEDURE Compress;
{Compress the input file by reading it one byte at a time and comparing
that byte to the one previously read.}
{================================================}
PROCEDURE NewSegment (CONST StringType : SegmentTypes);
{set the appropriate prefix type; write the string segment and adjust
the statistics counters. Lastly insert the latest byte and reset the
prefix count to 1}
BEGIN
CASE StringType OF
LEN :
BEGIN {prefix byte remains positive < 128}
BlockWrite (OutputFile, RLE_Record, RLE_Record.RepeatCount + 1);
TotalBytesOut:=TotalBytesOut + RLE_Record.RepeatCount + 1;
END; {LEN}
RUN :
BEGIN {flip the sign of the prefix byte}
RLE_Record.RepeatCount:=RLE_Record.RepeatCount * -1;
BlockWrite (OutputFile, RLE_Record, 2);
TotalBytesOut:=TotalBytesOut + 2;
END; {RUN}
END; {CASE}
Segments:=Segments+1;
RLE_Record.Data[1]:=CurrentChar; {set new byte to compare}
RLE_Record.RepeatCount:=1; {only one of them so far}
END; {NewSegment}
{================================================}
FUNCTION ReadRaw : char;
{feed one byte from the input file. A string buffer is used to hold
the file's data to minimize the I/O time spent in the file.}
BEGIN
IF IBufferIndex<=ReadRes